library(knitr)
library(TimeSeriesAnalysis)
## Loading required package: BiocFileCache
## Loading required package: dbplyr
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
## Warning: replacing previous import 'SummarizedExperiment::shift' by
## 'data.table::shift' when loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'BiocGenerics::Position' by
## 'ggplot2::Position' when loading 'TimeSeriesAnalysis'
##
##
##
## Warning: replacing previous import 'BiocGenerics::residuals' by
## 'stats::residuals' when loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'SummarizedExperiment::start' by
## 'stats::start' when loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'SummarizedExperiment::end' by 'stats::end'
## when loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'BiocGenerics::weights' by 'stats::weights'
## when loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'BiocGenerics::IQR' by 'stats::IQR' when
## loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'BiocGenerics::var' by 'stats::var' when
## loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'BiocGenerics::xtabs' by 'stats::xtabs' when
## loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'BiocGenerics::sd' by 'stats::sd' when
## loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'BiocGenerics::density' by 'stats::density'
## when loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'BiocGenerics::mad' by 'stats::mad' when
## loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'GenomicRanges::update' by 'stats::update'
## when loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'data.table::shift' by 'tictoc::shift' when
## loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'AnnotationDbi::tail' by 'utils::tail' when
## loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'AnnotationDbi::head' by 'utils::head' when
## loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'BiocGenerics::relist' by 'utils::relist'
## when loading 'TimeSeriesAnalysis'
## Warning: replacing previous import 'data.table::melt' by 'reshape2::melt' when
## loading 'TimeSeriesAnalysis'
library(SummarizedExperiment)
library(ggplot2)
library(svglite)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::ident() masks dbplyr::ident()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice() masks IRanges::slice()
## ✖ dplyr::sql() masks dbplyr::sql()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Cairo)
library(gprofiler2)
library(stringr)
library(DESeq2) # BiocManager::install('DESeq2')
library(limma) # BiocManager::install('limma')
##
## Attaching package: 'limma'
##
## The following object is masked from 'package:DESeq2':
##
## plotMA
##
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
library(ggpubr)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:ggpubr':
##
## get_legend
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
##
## The following object is masked from 'package:dplyr':
##
## select
library(DEGreport) #BiocManager::install("DEGreport")
library(glmpca)
library(PoiClaClu)
library(RColorBrewer)
library(pheatmap)
#library(stringr)
#library(readxl)
#library(variancePartition) # BiocManager::install('variancePartition')
#library(doParallel)
#library(ggfortify) # install.packages("ggfortify")
#library(Biobase)
#library(arrayQualityMetrics)
library(ggvenn) # install.packages("ggvenn")
## Loading required package: grid
#if (!require(devtools)) install.packages("devtools")
#devtools::install_github("gaospecial/ggVennDiagram")
#library(ggVennDiagram)
## ggplot theme
theme_custom <- function() {
theme_bw(base_size = 10) %+replace% #, base_family = "Arial"
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_rect(color = "black", fill = NA),
legend.background = element_rect(fill = NA, colour = NA),
axis.text.x = element_text(angle=45, hjust=1, vjust = 1)
)
}
## ggplot labeller
colnames <- c(
`A` = "abdominal",
`Pp` = "pleural & pedal"
)
global_labeller <- labeller(
tissue = colnames,
.default = "label_parsed"
)
volcanoplot_alt <- function(DE_res,genes_of_interest=c(),filter_choice='padj',l2FC_thresh=0,
p_thresh=0.05,plot_title="Volcano plot",label_top_n=0,show_non_sig_interest=TRUE){
DE_res$Significance <- NA
sigUp <- which(DE_res$log2FoldChange>l2FC_thresh & DE_res[filter_choice]<p_thresh)
DE_res[ sigUp, "Significance"] <- paste0("up-reg with ",filter_choice,"<",p_thresh," (",length(sigUp),")")
#subset and fill columns based on significant downregulation
sigDown <- which(DE_res$log2FoldChange<(-l2FC_thresh) & DE_res[filter_choice]<p_thresh)
DE_res[ sigDown, "Significance"] <- paste0("down-reg with ",filter_choice,"<",p_thresh," (",length(sigDown),")")
#subset and fill columns based on non-significant upregulation
nonSig <- which(DE_res[filter_choice]>=p_thresh)
DE_res[ nonSig, "Significance"] <- paste0("non-significant with ",filter_choice,">",p_thresh," (",length(nonSig),")")
lowSig <- which(abs(DE_res$log2FoldChange)<=l2FC_thresh & DE_res[filter_choice]<p_thresh)
DE_res[ lowSig, "Significance"] <- paste0("low-regulation with ",filter_choice,"<",p_thresh," (",length(lowSig),")")
#Find the lowest significant point to draw a horizontal ax-line at that point on the axis
#Sort using the filter choice
DE_res <- DE_res[order(DE_res[[filter_choice]]),]
#Find the 'first' non significant gene
gene_for_sig_line<-DE_res$gene[DE_res$Significance==paste0("non-significant with ",filter_choice,">",p_thresh," (",length(nonSig),")")][1]
#Create a dataframe of necessary columns and convert padj to -log10 (as is it's form in the plot)
find_lowest_sig<-DE_res[,c('gene','padj')]
find_lowest_sig$padj<- -log10(find_lowest_sig$padj)
#Extract value based on the gene.
lowest_sig<-find_lowest_sig$padj[find_lowest_sig$gene==gene_for_sig_line]
labs_data <- DE_res[DE_res$gene %in% genes_of_interest, ]
if (show_non_sig_interest==FALSE){
labs_data <- labs_data[labs_data[filter_choice]<p_thresh & abs(labs_data$log2FoldChange)>l2FC_thresh,]
}
if (label_top_n>0){
top_res <- DE_res[order(DE_res[filter_choice]),]
labs_data_top <- top_res[1:label_top_n,]
}else{
labs_data_top <- DE_res[FALSE,]
}
v_plot <- ggplot(DE_res, aes(x = log2FoldChange, y = -log10(padj), color=Significance)) +
geom_point(size=0.8) +
geom_hline(yintercept=lowest_sig, linetype="dashed", color = "black")+
xlab(expression('Log'[2]*'Fold Change')) +
ylab(expression('-log10(padj)')) +
geom_vline(xintercept = l2FC_thresh, linetype="dashed", color = "black") +
geom_vline(xintercept = -l2FC_thresh, linetype="dashed", color = "black") +
ggrepel::geom_label_repel(data = labs_data, mapping = aes(label = gene),
box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines"),
force = 1, segment.colour = 'black',show.legend = FALSE,
label.size = 1)+
ggrepel::geom_label_repel(data = labs_data_top, mapping = aes(label = gene),
box.padding = unit(0.35, "lines"),
point.padding = unit(0.3, "lines"),
force = 0.5, colour = 'black',show.legend = FALSE) +
scale_color_manual(breaks = c(paste0("up-reg with ",filter_choice,"<",p_thresh," (",length(sigUp),")"),
paste0("down-reg with ",filter_choice,"<",p_thresh," (",length(sigDown),")"),
paste0("non-significant with ",filter_choice,">",p_thresh," (",length(nonSig),")"),
paste0("low-regulation with ",filter_choice,"<",p_thresh," (",length(lowSig),")")),
values=c("#B31B21","#1465AC","darkgray","green")) +
guides(color = guide_legend(override.aes = list(size = 7)))+
ggtitle(plot_title) +
theme_light()+
theme(text = element_text(size = 10),
plot.title = element_text(size = 20),
legend.text = element_text(size = 10))
return(v_plot)
}
# STAR quantMode geneCounts output:
#column 1: gene ID
#column 2: counts for unstranded RNA-seq
#column 3: counts for the 1st read strand aligned with RNA (htseq-count option -s yes)
#column 4: counts for the 2nd read strand aligned with RNA (htseq-count option -s reverse)
sampleNames <- list.files(path = glue::glue("gene_expression/data/GeneCounts_merged/"), pattern = "*ReadsPerGene.out.tab") %>%
stringr::str_split_fixed("_", n = 4) %>%
tibble::as_tibble() %>%
dplyr::mutate(Name = V1) %>%
dplyr::select(Name) %>%
purrr::flatten_chr()
## Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
## `.name_repair` is omitted as of tibble 2.0.0.
## ℹ Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
geneIDs <- list.files(path = glue::glue("gene_expression/data/GeneCounts_merged/"), pattern = "*ReadsPerGene.out.tab", full.names = T)[1] %>%
data.table::fread(select = 1) %>%
purrr::flatten_chr()
countMatrix <- list.files(path = glue::glue("gene_expression/data/GeneCounts_merged/"), pattern = "*ReadsPerGene.out.tab", full.names = T) %>%
purrr::map_dfc(data.table::fread, select = 3, data.table = F) %>%
magrittr::set_colnames(sampleNames) %>%
magrittr::set_rownames(geneIDs)
## New names:
## • `V3` -> `V3...1`
## • `V3` -> `V3...2`
## • `V3` -> `V3...3`
## • `V3` -> `V3...4`
## • `V3` -> `V3...5`
## • `V3` -> `V3...6`
## • `V3` -> `V3...7`
## • `V3` -> `V3...8`
## • `V3` -> `V3...9`
## • `V3` -> `V3...10`
## • `V3` -> `V3...11`
## • `V3` -> `V3...12`
## • `V3` -> `V3...13`
## • `V3` -> `V3...14`
## • `V3` -> `V3...15`
## • `V3` -> `V3...16`
## • `V3` -> `V3...17`
## • `V3` -> `V3...18`
## • `V3` -> `V3...19`
## • `V3` -> `V3...20`
## • `V3` -> `V3...21`
## • `V3` -> `V3...22`
## • `V3` -> `V3...23`
## • `V3` -> `V3...24`
## • `V3` -> `V3...25`
## • `V3` -> `V3...26`
## • `V3` -> `V3...27`
## • `V3` -> `V3...28`
## • `V3` -> `V3...29`
## • `V3` -> `V3...30`
## • `V3` -> `V3...31`
## • `V3` -> `V3...32`
## • `V3` -> `V3...33`
## • `V3` -> `V3...34`
## • `V3` -> `V3...35`
## • `V3` -> `V3...36`
## • `V3` -> `V3...37`
## • `V3` -> `V3...38`
## • `V3` -> `V3...39`
## • `V3` -> `V3...40`
## • `V3` -> `V3...41`
## • `V3` -> `V3...42`
## • `V3` -> `V3...43`
## • `V3` -> `V3...44`
## • `V3` -> `V3...45`
## • `V3` -> `V3...46`
## • `V3` -> `V3...47`
## • `V3` -> `V3...48`
## • `V3` -> `V3...49`
## • `V3` -> `V3...50`
## • `V3` -> `V3...51`
## • `V3` -> `V3...52`
## • `V3` -> `V3...53`
## • `V3` -> `V3...54`
## • `V3` -> `V3...55`
## • `V3` -> `V3...56`
## • `V3` -> `V3...57`
## • `V3` -> `V3...58`
## • `V3` -> `V3...59`
## • `V3` -> `V3...60`
## • `V3` -> `V3...61`
## • `V3` -> `V3...62`
## • `V3` -> `V3...63`
## • `V3` -> `V3...64`
## • `V3` -> `V3...65`
## • `V3` -> `V3...66`
## • `V3` -> `V3...67`
## • `V3` -> `V3...68`
## • `V3` -> `V3...69`
## • `V3` -> `V3...70`
## • `V3` -> `V3...71`
## • `V3` -> `V3...72`
## • `V3` -> `V3...73`
## • `V3` -> `V3...74`
## • `V3` -> `V3...75`
## • `V3` -> `V3...76`
## • `V3` -> `V3...77`
## • `V3` -> `V3...78`
## • `V3` -> `V3...79`
## • `V3` -> `V3...80`
## • `V3` -> `V3...81`
## • `V3` -> `V3...82`
## • `V3` -> `V3...83`
countMatrix <- countMatrix[-c(1:4),]
# Import sample metadata
sdat0 <- read_csv("gene_expression/data/sample_metadata.csv") %>%
filter(sample_ID %in% sampleNames)
## Rows: 83 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): animal_ID, treatment, tissue, sample_ID
## dbl (1): batch
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
sdat0$treatment <- gsub("C", "Control", sdat0$treatment)
sdat0$batch <- gsub(60, "Lab_cross", sdat0$batch)
sdat0$batch <- gsub(71, "Wild_cross", sdat0$batch)
sdat <- sdat0[sdat0$batch == "Wild_cross",] %>%
arrange(sample_ID) %>% # order rows by sample name
column_to_rownames(var = "sample_ID") # set sample name to rownames
countMatrix <- countMatrix[, colnames(countMatrix) %in% rownames(sdat)]
#load("output/counts_and_meta_Wild.RData")
# Create full DESeqDataSet
dds <- DESeqDataSetFromMatrix(countData = countMatrix,
colData = sdat,
design = ~ treatment + tissue)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
### Remove genes counted zero times
dds <- dds[ rowSums(counts(dds)) > 0, ]
# GLM PCA
gpca <- glmpca(counts(dds), L=2)
## Warning in glmpca(counts(dds), L = 2): Reached maximum number of iterations
## (1000) without numerical convergence. Results may be unreliable.
gpca.dat <- gpca$factors
gpca.dat$Sample <- dds$animal_ID
gpca.dat$Treatment <- dds$treatment
gpca.dat$tissue <- dds$tissue
pglmpca <- ggplot(gpca.dat, aes(x = dim1, y = dim2, color = Treatment, label = Sample, shape = tissue)) +
geom_point(size =3,) + coord_fixed() +
ggtitle("glmpca - Generalized PCA, ")
pglmpca
# One of the 7 day hypoxia samples is an outlier
poisd <- PoissonDistance(t(counts(dds)))
samplePoisDistMatrix <- as.matrix(poisd$dd)
colnames(samplePoisDistMatrix) <- rownames(colData(dds))
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(samplePoisDistMatrix,
clustering_distance_rows = poisd$dd,
clustering_distance_cols = poisd$dd,
col = colors)
# AC175A is an outlier. Will exclude
sdat <- sdat0[sdat0$batch == "Wild_cross",] %>%
filter(sample_ID != "AC175A") %>%
arrange(sample_ID) %>% # order rows by sample name
column_to_rownames(var = "sample_ID")
countMatrix <- countMatrix[, colnames(countMatrix) %in% rownames(sdat)]
dir.create(paste0('gene_expression/data/','Wild_PlPed_vs_Abd'))
## Warning in dir.create(paste0("gene_expression/data/", "Wild_PlPed_vs_Abd")):
## 'gene_expression/data/Wild_PlPed_vs_Abd' already exists
sdat1 <- sdat0 %>%
mutate(group = sdat0$tissue,
group = gsub("A", "Abdominal_ganglion", group),
group = gsub("Pp", "PleuralPedal_ganglia", group),
sample = sdat0$sample_ID) %>%
mutate(timepoint = sdat0$treatment,
timepoint = gsub("Control", 0, timepoint),
timepoint = gsub("Hyp_T2h", 1, timepoint),
timepoint = gsub("Hyp_T6h", 2, timepoint),
timepoint = gsub("Reox", 3, timepoint),
timepoint = gsub("LtH_6", 4, timepoint),
timepoint = gsub("LtH_7", 5, timepoint),
timepoint = gsub("Recovery", 6, timepoint)) %>%
filter(batch == "Wild_cross") %>%
filter(sample != "AC175A") %>%
group_by(group, timepoint) %>%
mutate(group_initials = case_when(
group == "Abdominal_ganglion" ~ "AG",
group == "PleuralPedal_ganglia" ~ "PpG"),
replicate = paste0(group_initials, "_", row_number())) %>%
ungroup() %>%
dplyr::select(sample, replicate, timepoint, group, treatment)
write_csv(sdat1, "gene_expression/data/Wild_PlPed_vs_Abd/sample_meta.csv")
# Directory where files will be saved
dir.create(paste0('gene_expression/data/','Wild_PlPed_vs_Abd/counts'))
## Warning in dir.create(paste0("gene_expression/data/",
## "Wild_PlPed_vs_Abd/counts")): 'gene_expression/data/Wild_PlPed_vs_Abd/counts'
## already exists
output_dir <- "/gene_expression/data/Wild_PlPed_vs_Abd/counts/" # Specify your directory
# Loop through each sample (column) in the matrix
for (sample in colnames(countMatrix[,sdat1$sample])) {
# Subset the matrix to get the data for the current sample (column)
sample_data <- countMatrix[, sample, drop = FALSE]
# Save the sample data to a tab-delimited file
write.table(sample_data, file = paste0(getwd(),output_dir, sample, ".counts"), sep = "\t", col.names = FALSE, quote = FALSE)
}
#Give names to saved object and name of results repository
dir.create('gene_expression/output/TS_output')
## Warning in dir.create("gene_expression/output/TS_output"):
## 'gene_expression/output/TS_output' already exists
dir.create('gene_expression/output/TS_output/Wild_PlPed_vs_Abd')
## Warning in dir.create("gene_expression/output/TS_output/Wild_PlPed_vs_Abd"):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd' already exists
name_result_folder<-'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/'
obj_name<-'timeSeries_obj_Wild_PlPed_vs_Abd.Rdata'
graphic_vector<-c("#e31a1c","#1f78b4") #Pre-set colors for the groups
names(graphic_vector)<-c('PleuralPedal_ganglia','Abdominal_ganglion')
# Object creation -------------------
TS_object <- new('TimeSeries_Object',
group_names=c('PleuralPedal_ganglia','Abdominal_ganglion'),group_colors=graphic_vector,
DE_method='DESeq2',DE_p_filter='padj',DE_p_thresh=0.05,
DE_l2fc_thresh=0,PART_l2fc_thresh=2,sem_sim_org='org.Hs.eg.db',Gpro_org='hsapiens')
TS_object <- add_experiment_data(TS_object,
sample_dta_path="gene_expression/data/Wild_PlPed_vs_Abd/sample_meta.csv",
count_dta_path="gene_expression/data/Wild_PlPed_vs_Abd/counts/")
TS_object <- add_semantic_similarity_data(TS_object,"BP")
## Warning in godata(sem_org, ont = ont_sem_sim, computeIC = TRUE): use 'annoDb'
## instead of 'OrgDb'
## preparing gene to GO mapping data...
## preparing IC data...
# DESeq2 -------------------
TS_object <- normalize_timeSeries_with_deseq2(time_object=TS_object)
## converting counts to integer mode
#Perform conditional differential gene expression analysis
TS_object<-conditional_DE_wrapper(TS_object)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
#Perform temporal differential gene expression analysis
TS_object<-temporal_DE_wrapper(TS_object,do_all_combinations=TRUE)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Subset temporal comparisons to only relevant ones
temp_to_keep <- c("TP_1_vs_TP_0", "TP_2_vs_TP_0", "TP_4_vs_TP_0", "TP_5_vs_TP_0", "TP_3_vs_TP_2", "TP_6_vs_TP_2")
TS_object@DE_results$temporal <- TS_object@DE_results$temporal[temp_to_keep]
# PART -------------------
#Extract genes for PART clustering based on defined log(2)foldChange threshold
signi_genes<-select_genes_with_l2fc(TS_object)
sample_data<-exp_sample_data(TS_object)
#Use all samples, but implement a custom order. In this case it is reversed
samps_2<-sample_data$sample[sample_data$group==TS_object@group_names[2]]
samps_1<-sample_data$sample[sample_data$group==TS_object@group_names[1]]
#Create the matrix that will be used for PART clustering
TS_object<-prep_counts_for_PART(object=TS_object,target_genes=signi_genes,
scale=TRUE,target_samples=c(samps_2,samps_1))
#Sets a seed for reproducibility
set.seed('123456')
TS_object<-compute_PART(TS_object,part_recursion=120,part_min_clust=50,
custom_seed=123456,dist_param="euclidean", hclust_param="average")
## computing PART clusters
## There are 2291 in the matrix. If the computing time takes too long consider increasing the PART_l2fc_threshold.
# Gprofiler -------------------
TS_object<-run_gprofiler_PART_clusters(TS_object) #Run the gprofiler analysis with modified function to get human orthologs
## running Gprofiler on PART clusters
## Gprofiler for C1
## Gprofiler for C2
## Gprofiler for C3
## Gprofiler for C4
## Gprofiler for C5
## Gprofiler for C6
## Gprofiler for C7
# PCA -------------------
TS_pca<-plot_PCA_TS(TS_object,DE_type='all')
## using ntop=500 top features by variance
ggsave(paste0(name_result_folder,"PCA_plot.png"),dpi=300,width=21, height=19, units='cm',plot=TS_pca)
# DESeq2 results -------------------
#Set genes of interest (optional) - can be left as c()
genes_of_interest <- c('LOC101850742','LOC101857241','LOC101857709','LOC101851246','LOC101857475','LOC101849091', "COX1","ND6","ND5","ND1","NDL4","CYTB","COX2","ATP8","ATP6","ND3","ND4","COX3","ND2")
#Run wrappers twice once for conditional and another for temporal
plot_wrapper_DE_results(object=TS_object,DE_type='conditional',genes_of_interest=genes_of_interest,results_folder=name_result_folder)
## Warning in dir.create(main_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_conditional'
## already exists
## Creating summary heat map
## creating results for PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_0
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_conditional/PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_0'
## already exists
## creating results for PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_1
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_conditional/PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_1'
## already exists
## creating results for PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_2
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_conditional/PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_2'
## already exists
## creating results for PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_4
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_conditional/PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_4'
## already exists
## creating results for PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_5
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_conditional/PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_5'
## already exists
## creating results for PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_6
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_conditional/PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_6'
## already exists
## creating results for PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_3
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_conditional/PleuralPedal_ganglia_vs_Abdominal_ganglion_TP_3'
## already exists
plot_wrapper_DE_results(object=TS_object,DE_type='temporal',genes_of_interest=genes_of_interest,results_folder=name_result_folder)
## Warning in dir.create(main_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_temporal'
## already exists
## Creating summary heat map
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 columns You can control `use_raster` argument by explicitly
## setting TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 columns You can control `use_raster` argument by explicitly
## setting TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## creating results for TP_1_vs_TP_0
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_temporal/TP_1_vs_TP_0'
## already exists
## creating results for TP_2_vs_TP_0
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_temporal/TP_2_vs_TP_0'
## already exists
## creating results for TP_4_vs_TP_0
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_temporal/TP_4_vs_TP_0'
## already exists
## creating results for TP_5_vs_TP_0
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_temporal/TP_5_vs_TP_0'
## already exists
## creating results for TP_3_vs_TP_2
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_temporal/TP_3_vs_TP_2'
## already exists
## creating results for TP_6_vs_TP_2
## Warning in dir.create(save_path):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/DE_results_temporal/TP_6_vs_TP_2'
## already exists
# PART results -------------------
dir.create(paste0(name_result_folder,'PART_results')) #create the directory to store results
## Warning in dir.create(paste0(name_result_folder, "PART_results")):
## 'gene_expression/output/TS_output/Wild_PlPed_vs_Abd/PART_results' already
## exists
PART_heat_map(TS_object,paste0(name_result_folder,'PART_results/PART_heat')) #Create a summary heatmap
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
ts_data<-calculate_cluster_traj_data(TS_object,scale_feat=TRUE) #Calculate scaled gene values for genes of clusters
mean_ts_data<-calculate_mean_cluster_traj(ts_data) #Calculate the mean scaled values for each cluster
#Function which determines the number of SVG files to plot all cluster trajectories
wrapper_cluster_trajectory(TS_object,ts_data,mean_ts_data,yaxis_name='scaled mean counts',log_TP=F,plot_name=paste0(name_result_folder,'/PART_results/Ctraj'))
# Gprofiler results -------------------
#Create standard gprofiler results
gpro_res<-gprofiler_cluster_analysis(TS_object,'All',save_path = name_result_folder)
GO_clusters<-gpro_res[['GO_df']]
sem_dta<-slot(TS_object,'sem_list')
found_clusters<-find_clusters_from_termdist(GO_clusters,sem_dta)
## [1] "No clusters found"
#Plot and save MDS and clustered MDS
MDS_plots<-wrapper_MDS_and_MDS_clusters(GO_clusters,sem_dta,'BP',target_dir=paste0(name_result_folder,'gprofiler_results/'),return_plot=TRUE)
#Create dotplots
GO_dotplot_wrapper(TS_object,file_loc=name_result_folder,target_ontology='GO:BP',top_n=10,custom_width=10)
GO_dotplot_wrapper(TS_object,file_loc=name_result_folder,target_ontology='GO:MF',top_n=10,custom_width=10)
GO_dotplot_wrapper(TS_object,file_loc=name_result_folder,target_ontology='GO:CC',top_n=10,custom_width=10)
# Ancestor queries results -------------------
ancestor_list <- c("GO:0001666","GO:0071456","GO:0002250","GO:0002376","GO:0002283","GO:0002252", "GO:0002684","GO:0008152","GO:0006950","GO:0030324","GO:0042990","GO:0006950","GO:0007610","GO:0008285","GO:0043522","GO:0010467","GO:0001501")
GOs_ancestors_clust<-find_relation_to_ancestors(target_ancestors=ancestor_list,GOs_to_check=GO_clusters,ontology = 'BP')
ancestor_plots<-wrapper_ancestor_curation_plots(GOs_ancestors_clust,sem_dta,return_plot=TRUE,target_dir=name_result_folder)
save(TS_object,file=paste0(name_result_folder,obj_name))
sdat <- sdat %>%
mutate(tissue.group = interaction(tissue, treatment)) %>%
mutate_if(is.character, as.factor)
# Create full DESeqDataSet
dds <- DESeqDataSetFromMatrix(countData = countMatrix,
colData = sdat,
design = ~ treatment + tissue)
### Remove genes with low counts
dds <- dds[ rowSums(counts(dds)) > 1, ]
# Run DESeq pipeline
design(dds) <- formula(~ tissue.group)
dsr1 <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Define group contrasts
group.contrasts <- tibble(num = c("Hyp_T2h","Hyp_T6h","LtH_6","LtH_7","Reox","Recovery",
"Hyp_T6h","Reox","Recovery","Reox","LtH_6","LtH_7"),
den = c("Control","Control","Control","Control","Control","Control",
"Hyp_T2h","Hyp_T6h","Hyp_T6h","Recovery","Hyp_T6h","Hyp_T6h"))
# Get DESeq results for all group contrasts for each batch
DE <- tidyr::crossing(tissue = c("A", "Pp"), group.contrasts) %>%
mutate(dsr = pmap(list(tissue, num, den), function(x, y, z) {
results(dsr1, contrast = c("tissue.group", paste0(x, ".", c(y, z))))}))
#Then, run model with no separation between batches
# Run DESeq pipeline
design(dds) <- formula(~ treatment)
dsr2 <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
# Get DESeq results for all contrasts and join with results from each batch, from above
DE <- tidyr::crossing(batch = "all", group.contrasts) %>%
mutate(dsr = map2(num, den, ~ results(dsr2, contrast = c("treatment", .x, .y)))) %>%
bind_rows(DE)
DE <- DE %>%
mutate(sig = map(dsr, ~ rownames_to_column(data.frame(.[which(.$padj < 0.05), ]), "gene")),
up = map(sig, ~ filter(., log2FoldChange > 0)),
down = map(sig, ~ filter(., log2FoldChange < 0)),
full = map(dsr, ~ rownames_to_column(data.frame(.), "gene")))
# Generate logP values for all differential expression contrasts
DE <- DE %>% mutate(logP = map(dsr, ~ data.frame(
gene = rownames(data.frame(.)),
logP = -log10(data.frame(.)$pvalue) * sign(data.frame(.)$log2FoldChange))))
# Count number of differentially expressed genes within each batch, and overall, for each contrast
DE_tab <- DE %>%
filter((num == "Hyp_T2h" & den == "Control")|(num == "Hyp_T6h" & den == "Control")|
(num == "LtH_6" & den == "Control")|(num == "LtH_7" & den == "Control")|
(num == "Reox" & den == "Control")|(num == "Recovery" & den == "Control")|
(num == "Hyp_T6h" & den == "Hyp_T2h")|(num == "Reox" & den == "Hyp_T6h")|
(num == "Recovery" & den == "Hyp_T6h")|(num == "Reox" & den == "Recovery")|
(num == "LtH_6" & den == "Hyp_T6h")|(num == "LtH_7" & den == "Hyp_T6h")) %>%
mutate(nsig = map_dbl( sig, ~ nrow(.)),
nup = map_dbl( up, ~ nrow(.)),
ndn = map_dbl(down, ~ nrow(.)),
`DEGs [up, down]` = paste0(nsig, " [", nup, ", ", ndn, "]")) %>%
mutate(ganglia = case_when(tissue == "A" ~ "abdominal", tissue == "Pp" ~ "pleural/pedal",
tissue == "all" ~ "all ganglia")) %>%
mutate(ganglia = factor(ganglia, levels = c("abdominal", "pleural/pedal", "all ganglia"))) %>%
dplyr::select(ganglia, num, den, `DEGs [up, down]`) %>%
spread(ganglia, `DEGs [up, down]`)
DE_tab$contrast <- paste(DE_tab$num, DE_tab$den, sep = " vs ")
dek <- DE_tab %>% dplyr::select(c(6,3:5)) %>%
knitr::kable(format = "markdown", caption = "Number of differentially expressed genes per ganglia and across all ganglia for each specified contrast. DEGs identified using DESeq with an adjusted p-value < 0.05. In brackets are the numbers of significantly up- and down-regulated genes.", row.names = )
dek
| contrast | abdominal | pleural/pedal | |
|---|---|---|---|
| Hyp_T2h vs Control | 3663 [1153, 2510] | 303 [141, 162] | 2119 [540, 1579] |
| Hyp_T6h vs Control | 3918 [1344, 2574] | 204 [131, 73] | 2169 [657, 1512] |
| Hyp_T6h vs Hyp_T2h | 243 [210, 33] | 6 [4, 2] | 130 [112, 18] |
| LtH_6 vs Control | 4029 [1371, 2658] | 56 [41, 15] | 1084 [205, 879] |
| LtH_6 vs Hyp_T6h | 282 [73, 209] | 502 [181, 321] | 254 [87, 167] |
| LtH_7 vs Control | 3175 [1144, 2031] | 280 [147, 133] | 1200 [311, 889] |
| LtH_7 vs Hyp_T6h | 208 [60, 148] | 697 [305, 392] | 260 [135, 125] |
| Recovery vs Control | 2853 [689, 2164] | 48 [40, 8] | 1583 [323, 1260] |
| Recovery vs Hyp_T6h | 371 [100, 271] | 206 [120, 86] | 508 [218, 290] |
| Reox vs Control | 3644 [1244, 2400] | 728 [471, 257] | 2365 [840, 1525] |
| Reox vs Hyp_T6h | 93 [11, 82] | 4 [1, 3] | 39 [7, 32] |
| Reox vs Recovery | 213 [144, 69] | 254 [141, 113] | 386 [194, 192] |
DE %>%
unnest(full) %>%
filter(tissue == "A") %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
filter(contrast %in% c("Hyp_T2h.Control", "Hyp_T6h.Control",
"LtH_6.Control", "LtH_7.Control",
"Reox.Hyp_T6h", "Reox.Control",
"Recovery.Hyp_T6h", "Recovery.Control")) %>%
dplyr::select(., gene, log2FoldChange, padj, contrast) %>%
write_csv(., file = "gene_expression/output/DE_wild/Abdominal/DE_raw_data.csv")
DE %>%
unnest(full) %>%
filter(tissue == "Pp") %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
filter(contrast %in% c("Hyp_T2h.Control", "Hyp_T6h.Control",
"LtH_6.Control", "LtH_7.Control",
"Reox.Hyp_T6h", "Reox.Control",
"Recovery.Hyp_T6h", "Recovery.Control")) %>%
dplyr::select(., gene, log2FoldChange, padj, contrast) %>%
write_csv(., file = "gene_expression/output/DE_wild/Pleural/DE_raw_data.csv")
res_A <- DE %>%
unnest(sig) %>%
filter(tissue == "A") %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
filter(contrast %in% c("Hyp_T2h.Control", "Hyp_T6h.Control",
"LtH_6.Control", "LtH_7.Control",
"Reox.Hyp_T6h", "Reox.Control",
"Recovery.Hyp_T6h", "Recovery.Control")) %>%
dplyr::select(.,contrast, gene, log2FoldChange)
write_csv(res_A, file = "gene_expression/output/DE_wild/Abdominal/DE_sig_data.csv")
res_Pp <- DE %>%
unnest(sig) %>%
filter(tissue == "Pp") %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
filter(contrast %in% c("Hyp_T2h.Control", "Hyp_T6h.Control",
"LtH_6.Control", "LtH_7.Control",
"Reox.Hyp_T6h", "Reox.Control",
"Recovery.Hyp_T6h", "Recovery.Control")) %>%
dplyr::select(.,contrast, gene, log2FoldChange)
write_csv(res_A, file = "gene_expression/output/DE_wild/Pleural/DE_sig_data.csv")
DEGs_A <- DE %>%
unnest(sig) %>%
filter(tissue == "A") %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
dplyr::select(.,contrast, gene)
DEGs_A_wide <- list(Hyp_T2h=pull(DEGs_A[which(DEGs_A$contrast=="Hyp_T2h.Control"),2]),
Hyp_T6h=pull(DEGs_A[which(DEGs_A$contrast=="Hyp_T6h.Control"),2]),
Hyp_T6d=pull(DEGs_A[which(DEGs_A$contrast=="LtH_6.Control"),2]),
Hyp_T7d=pull(DEGs_A[which(DEGs_A$contrast=="LtH_7.Control"),2]))
str(DEGs_A_wide)
## List of 4
## $ Hyp_T2h: chr [1:3663] "LOC118478065" "LOC101855301" "LOC101847694" "LOC118478385" ...
## $ Hyp_T6h: chr [1:3918] "LOC118478065" "LOC101847694" "LOC118478385" "LOC101850682" ...
## $ Hyp_T6d: chr [1:4029] "LOC118478065" "LOC101847694" "LOC101850446" "LOC101850682" ...
## $ Hyp_T7d: chr [1:3175] "LOC118478065" "LOC101847694" "LOC101848083" "LOC101852364" ...
gv3 <- ggvenn(DEGs_A_wide, fill_color = c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF"),
stroke_size = 0.5, set_name_size = 4, show_percentage = F, stroke_color = F
)
gv3
ggsave("gene_expression/output/DE_wild/Abdominal/DEGs_vennDiagram.png", gv3, height = 3, width = 5)
DEGs_Pp <- DE %>%
unnest(sig) %>%
filter(tissue == "Pp") %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
dplyr::select(.,contrast, gene)
DEGs_Pp_wide <- list(Hyp_T2h=pull(DEGs_Pp[which(DEGs_Pp$contrast=="Hyp_T2h.Control"),2]),
Hyp_T6h=pull(DEGs_Pp[which(DEGs_Pp$contrast=="Hyp_T6h.Control"),2]),
Hyp_T6d=pull(DEGs_Pp[which(DEGs_Pp$contrast=="LtH_6.Control"),2]),
Hyp_T7d=pull(DEGs_Pp[which(DEGs_Pp$contrast=="LtH_7.Control"),2]))
str(DEGs_Pp_wide)
## List of 4
## $ Hyp_T2h: chr [1:303] "LOC101852128" "LOC101853981" "LOC101845657" "LOC101846389" ...
## $ Hyp_T6h: chr [1:204] "LOC101852128" "LOC101850213" "LOC101852365" "LOC101861245" ...
## $ Hyp_T6d: chr [1:56] "LOC101853653" "LOC101852892" "LOC101845814" "LOC100861465" ...
## $ Hyp_T7d: chr [1:280] "LOC101848083" "LOC101850682" "LOC101859675" "LOC101850373" ...
gv2 <- ggvenn(DEGs_Pp_wide, fill_color = c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF"),
stroke_size = 0.5, set_name_size = 4, show_percentage = F, stroke_color = F
)
gv2
ggsave("gene_expression/output/DE_wild/Pleural/DEGs_vennDiagram.png", gv2, height = 3, width = 5)
DEGs_wild <- DE %>%
unnest(sig) %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
filter(contrast %in% c("Hyp_T2h.Control", "Hyp_T6h.Control", "LtH_6.Control", "LtH_7.Control")) %>%
dplyr::select(.,contrast, gene, tissue)
DEGs_wild_wide <- list(abdominal=pull(DEGs_wild[which(DEGs_wild$tissue=="A"),2]),
pleural=pull(DEGs_wild[which(DEGs_wild$tissue=="Pp"),2]))
str(DEGs_wild_wide)
## List of 2
## $ abdominal: chr [1:14785] "LOC118478065" "LOC101855301" "LOC101847694" "LOC118478385" ...
## $ pleural : chr [1:843] "LOC101852128" "LOC101853981" "LOC101845657" "LOC101846389" ...
gv3 <- ggvenn(DEGs_wild_wide, fill_color = c("#0073C2FF", "#EFC000FF"),
stroke_size = 0.5, set_name_size = 4, show_percentage = F, stroke_color = F
)
gv3
ggsave("gene_expression/output/DE_wild/DEGs_Wild_by_tissue_vennDiagram.png", gv3, height = 3, width = 5)
volcano_plots_A <- DE %>%
unnest(full) %>%
filter(tissue == "A") %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
filter(contrast %in% c("Hyp_T2h.Control", "Hyp_T6h.Control",
"LtH_6.Control", "LtH_7.Control",
"Reox.Hyp_T6h", "Reox.Control",
"Recovery.Hyp_T6h", "Recovery.Control")) %>%
dplyr::select(gene, log2FoldChange, padj, contrast) %>%
group_by(contrast) %>% # Group by contrast
nest() %>% # Nest data for each contrast
mutate(plots = map2(data, contrast, ~ volcanoplot_alt(.x, plot_title=.y)))
# View the plots
volcano_plots_A$plots
## [[1]]
## Warning: Removed 1194 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[2]]
## Warning: Removed 1194 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[3]]
## Warning: Removed 1591 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[4]]
## Warning: Removed 1987 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[5]]
## Warning: Removed 1591 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[6]]
## Warning: Removed 7937 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[7]]
## Warning: Removed 1194 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[8]]
## Warning: Removed 3971 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Save all plots to files
volcano_plots_A %>%
mutate(plot_paths = map2(plots, contrast, ~ ggsave(filename = paste0("gene_expression/output/DE_wild/Abdominal/",.y, "_volcano.png"), plot = .x)))
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Warning: There were 8 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `plot_paths = map2(...)`.
## ℹ In group 1: `contrast = "Hyp_T2h.Control"`.
## Caused by warning:
## ! Removed 1194 rows containing missing values or values outside the scale range
## (`geom_point()`).
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 7 remaining warnings.
## # A tibble: 8 × 4
## # Groups: contrast [8]
## contrast data plots plot_paths
## <chr> <list> <list> <list>
## 1 Hyp_T2h.Control <tibble [20,467 × 3]> <gg> <chr [1]>
## 2 Hyp_T6h.Control <tibble [20,467 × 3]> <gg> <chr [1]>
## 3 LtH_6.Control <tibble [20,467 × 3]> <gg> <chr [1]>
## 4 LtH_7.Control <tibble [20,467 × 3]> <gg> <chr [1]>
## 5 Recovery.Control <tibble [20,467 × 3]> <gg> <chr [1]>
## 6 Recovery.Hyp_T6h <tibble [20,467 × 3]> <gg> <chr [1]>
## 7 Reox.Control <tibble [20,467 × 3]> <gg> <chr [1]>
## 8 Reox.Hyp_T6h <tibble [20,467 × 3]> <gg> <chr [1]>
volcano_plots_Pp <- DE %>%
unnest(full) %>%
filter(tissue == "Pp") %>%
mutate(contrast = paste(num, den, sep = ".")) %>%
filter(contrast %in% c("Hyp_T2h.Control", "Hyp_T6h.Control",
"LtH_6.Control", "LtH_7.Control",
"Reox.Hyp_T6h", "Reox.Control",
"Recovery.Hyp_T6h", "Recovery.Control")) %>%
dplyr::select(gene, log2FoldChange, padj, contrast) %>%
group_by(contrast) %>% # Group by contrast
nest() %>% # Nest data for each contrast
mutate(plots = map2(data, contrast, ~ volcanoplot_alt(.x, plot_title=.y)))
# View the plots
volcano_plots_Pp$plots
## [[1]]
## Warning: Removed 13095 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[2]]
## Warning: Removed 12301 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[3]]
## Warning: Removed 12301 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[4]]
## Warning: Removed 11507 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[5]]
## Warning: Removed 15872 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[6]]
## Warning: Removed 9921 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[7]]
## Warning: Removed 11507 rows containing missing values or values outside the scale range
## (`geom_point()`).
##
## [[8]]
## Warning: Removed 11904 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Save all plots to files
volcano_plots_Pp %>%
mutate(plot_paths = map2(plots, contrast, ~ ggsave(filename = paste0("gene_expression/output/DE_wild/Pleural/",.y, "_volcano.png"), plot = .x)))
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Warning: There were 8 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `plot_paths = map2(...)`.
## ℹ In group 1: `contrast = "Hyp_T2h.Control"`.
## Caused by warning:
## ! Removed 13095 rows containing missing values or values outside the scale range
## (`geom_point()`).
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 7 remaining warnings.
## # A tibble: 8 × 4
## # Groups: contrast [8]
## contrast data plots plot_paths
## <chr> <list> <list> <list>
## 1 Hyp_T2h.Control <tibble [20,467 × 3]> <gg> <chr [1]>
## 2 Hyp_T6h.Control <tibble [20,467 × 3]> <gg> <chr [1]>
## 3 LtH_6.Control <tibble [20,467 × 3]> <gg> <chr [1]>
## 4 LtH_7.Control <tibble [20,467 × 3]> <gg> <chr [1]>
## 5 Recovery.Control <tibble [20,467 × 3]> <gg> <chr [1]>
## 6 Recovery.Hyp_T6h <tibble [20,467 × 3]> <gg> <chr [1]>
## 7 Reox.Control <tibble [20,467 × 3]> <gg> <chr [1]>
## 8 Reox.Hyp_T6h <tibble [20,467 × 3]> <gg> <chr [1]>
###label clusters
library(ComplexHeatmap)
## ========================================
## ComplexHeatmap version 2.21.1
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). Most of the arguments
## in the original pheatmap() are identically supported in the new function. You
## can still use the original function by explicitly calling pheatmap::pheatmap().
##
## Attaching package: 'ComplexHeatmap'
## The following object is masked from 'package:pheatmap':
##
## pheatmap
#load("data/clusters_A.R")
#load("data/clusters_Pp.R")
###A heatmap
A.norm <- res.A_patterns %>%
filter(.,Tissue == "A") %>%
dplyr::select(genes, time, value, cluster) %>%
tidyr::spread(time, value) %>% column_to_rownames("genes")
A.norm[A.norm$cluster == 3,]$cluster <- "A1"
A.norm[A.norm$cluster == 8,]$cluster <- "A2"
A.norm[A.norm$cluster == 9,]$cluster <- "A3"
A.norm[A.norm$cluster == 12,]$cluster <- "A10"
A.norm[A.norm$cluster == 16,]$cluster <- "A4"
A.norm[A.norm$cluster == 18,]$cluster <- "A5"
A.norm[A.norm$cluster == 20,]$cluster <- "A11"
A.norm[A.norm$cluster == 24,]$cluster <- "A12"
A.norm[A.norm$cluster == 35,]$cluster <- "A6"
A.norm[A.norm$cluster == 61,]$cluster <- "A9"
A.norm[A.norm$cluster == 84,]$cluster <- "A7"
A.norm[A.norm$cluster == 89,]$cluster <- "A13"
A.norm[A.norm$cluster == 120,]$cluster <- "A8"
A.norm <- A.norm[c(1:4,8,5:7)]
# A1-A6 Up, A7-A9 Down
A_clust_UP <- c("A1","A2","A3","A4","A5","A6","A7","A8","A9")
A_clust_DOWN <- c("A10","A11","A12","A13")
A_clust <- c(
"3" = "A1",
"8" = "A2",
"9" = "A3",
"12" = "A10",
"16" = "A4",
"18" = "A5",
"20" = "A11",
"24" = "A12",
"35" = "A6",
"61" = "A9",
"84" = "A7",
"89" = "A13",
"120" = "A8"
)
res.A_patterns$clust <- factor(A_clust[as.character(res.A_patterns$cluster)], levels = c("A1","A2","A3","A4","A5",
"A6","A7","A8","A9","A10",
"A11","A12","A13"),
ordered = T)
A.norm$cluster <- factor(A.norm$cluster, levels = c("A1","A2","A3","A4","A5","A6","A7","A8","A9","A10","A11","A12","A13"), ordered = T)
res.A_clusters$clust <- factor(A_clust[as.character(res.A_clusters$cluster)], levels = c("A1","A2","A3","A4","A5","A6","A7","A8","A9","A10","A11","A12","A13"), ordered = T)
res.A_clusters <- res.A_clusters %>% mutate( direction =
ifelse(clust %in% c("A10","A11","A12","A13"), "DOWN","UP")
)
A_clusters <- split(res.A_clusters, f = res.A_clusters$clust)
names(A_clusters) <- unique(res.A_clusters$clust)
###Pp heatmap
Pp.norm <- res.Pp_patterns %>%
filter(.,Tissue == "Pp") %>%
dplyr::select(genes, time, value, cluster) %>%
tidyr::spread(time, value) %>% column_to_rownames("genes")
Pp.norm[Pp.norm$cluster == 6,]$cluster <- "P5"
Pp.norm[Pp.norm$cluster == 8,]$cluster <- "P1"
Pp.norm[Pp.norm$cluster == 9,]$cluster <- "P2"
Pp.norm[Pp.norm$cluster == 12,]$cluster <- "P3"
Pp.norm[Pp.norm$cluster == 14,]$cluster <- "P4"
Pp.norm <- Pp.norm[c(1:4,8,5:7)]
#P1 P2 Up
Pp_clust <- c(
"6" = "P5",
"8" = "P1",
"9" = "P2",
"12" = "P3",
"14" = "P4"
)
res.Pp_patterns$clust <- Pp_clust[as.character(res.Pp_patterns$cluster)]
res.Pp_clusters$clust <- Pp_clust[as.character(res.Pp_clusters$cluster)]
res.Pp_clusters <- res.Pp_clusters %>% mutate( direction =
ifelse(clust %in% c("P5"),"DOWN", "UP")
)
Pp_clusters <- split(res.Pp_clusters, f = res.Pp_clusters$clust)
names(Pp_clusters) <- unique(res.Pp_clusters$clust)
panel_fun = function(index) {
pushViewport(viewport(xscale = c(1,8), yscale = c(-2, 2)))
grid.rect()
#lines for each contributing gene
# for(i in seq_along(index)) {
# grid.lines(x = (c(7:11,11.95)-6.5)/6,y = (as.vector(A.norm[index[i],2:7])+2)/4, gp = gpar(col = "lightgrey"))
# }
#95% CI lines
grid.polygon(x = c( (c(6:12)-6)/6, rev(c(6:11,12)-6)/6),
y = c(
( colMeans(A.norm[index,2:8]) + 1.96 * colSds(as.matrix(A.norm[index,2:8]))+2)/4,
rev(( colMeans(A.norm[index,2:8]) - 1.96 * colSds(as.matrix(A.norm[index,2:8]))+2)/4)
),
gp = gpar(fill = "lightgrey", col = 0))
# grid.lines(x = (c(7:11,11.95)-6.5)/6,
# y = ( colMeans(A.norm[index,2:7]) + 1.96 * colSds(as.matrix(A.norm[index,2:7]))+2)/4,
# gp = gpar(col = "black", lty = 2, lwd = 2))
# grid.lines(x = (c(7:11,11.95)-6.5)/6,
# y = ( colMeans(A.norm[index,2:7]) - 1.96 * colSds(as.matrix(A.norm[index,2:7]))+2)/4,
# gp = gpar(col = "black", lty = 2, lwd = 2))
#eigen-trajectory
grid.lines(x = (c(6:11,11.95)-6)/6,y = (as.vector(colMeans(A.norm[index,2:8]))+2)/4, gp = gpar(col = "red", lwd = 2))
#genes in cluster
grid.text(x = 0.5, y = 0.94, label = paste0("n= ",length(index)), just = "center", gp = gpar(fontsize = 8))
#axes
if(length(index) == 163) grid.xaxis(gp = gpar(fontsize = 8))
grid.yaxis(gp = gpar(fontsize = 8), main = FALSE)
popViewport()
}
anno = anno_zoom(align_to = A.norm$cluster,
which = "row",
panel_fun = panel_fun,
size = 0.3,
gap = unit(0.3,"cm"),
width = unit(4, "cm"),
link_width = unit(1,"cm"))
ht.A.clusters <- Heatmap( matrix = A.norm[,2:8],
name = "scaled TPM",
cluster_rows = FALSE,
cluster_row_slices = FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
column_names_side = "top",
column_names_centered = TRUE,
column_names_rot = 90,
column_names_gp = gpar(fontsize = 8),
#width = unit(3, "cm"),
row_split = A.norm$cluster,
row_title_rot = 0,
#column_title = "Treatment",
right_annotation = rowAnnotation(cluster = anno),
show_heatmap_legend = FALSE,
heatmap_legend_param = list(legend_direction = "horizontal",
title_position = "topcenter"),
heatmap_width = unit(8, "cm")
)
## Warning: The input is a data frame-like object, convert it to a matrix.
# svg(filename = "../figures/A.clusters.svg", width = 3.625, height = 8)
pdf(file = "gene_expression/figures/A.clusters.pdf", width = 4, height = 8)
draw(ht.A.clusters, heatmap_height = unit(19.5, "cm"), heatmap_legend_side = "bottom", padding = unit(c(0, 0, 0, 0.5), "cm"))
# decorate_row_title("scaled TPM",{
# grid.text("A", y = unit(1.8, "npc") , x = unit(0, "npc"), just = "left", gp = gpar(fontsize = 22)) })
dev.off()
## quartz_off_screen
## 2
panel_fun = function(index) {
pushViewport(viewport(xscale = c(1,8), yscale = c(-2, 2)))
grid.rect()
#lines for each contributing gene
# for(i in seq_along(index)) {
# grid.lines(x = (c(7:11,11.95)-6.5)/6,y = (as.vector(Pp.norm[index[i],2:7])+2)/4, gp = gpar(col = "lightgrey"))
# }
#95% CI lines
grid.polygon(x = c( (c(6:12)-6)/6, rev(c(6:11,12)-6)/6),
y = c(
( colMeans(Pp.norm[index,2:8]) + 1.96 * colSds(as.matrix(Pp.norm[index,2:8]))+2)/4,
rev(( colMeans(Pp.norm[index,2:8]) - 1.96 * colSds(as.matrix(Pp.norm[index,2:8]))+2)/4)
),
gp = gpar(fill = "lightgrey", col = 0))
# grid.lines(x = (c(7:11,11.95)-6.5)/6,
# y = ( colMeans(Pp.norm[index,2:7]) + 1.96 * colSds(as.matrix(Pp.norm[index,2:7]))+2)/4,
# gp = gpar(col = "black", lty = 2, lwd = 2))
# grid.lines(x = (c(7:11,11.95)-6.5)/6,
# y = ( colMeans(Pp.norm[index,2:7]) - 1.96 * colSds(as.matrix(Pp.norm[index,2:7]))+2)/4,
# gp = gpar(col = "black", lty = 2, lwd = 2))
#eigen-trajectory
grid.lines(x = (c(6:11,11.95)-6)/6,y = (as.vector(colMeans(Pp.norm[index,2:8]))+2)/4, gp = gpar(col = "red", lwd = 2))
#genes in cluster
grid.text(x = 0.5, y = 0.94, label = paste0("n= ",length(index)), just = "center", gp = gpar(fontsize = 8))
#axes
if(length(index) == 163) grid.xaxis(gp = gpar(fontsize = 8))
grid.yaxis(gp = gpar(fontsize = 8), main = FALSE)
popViewport()
}
anno = anno_zoom(align_to = Pp.norm$cluster,
which = "row",
panel_fun = panel_fun,
size = 0.3,
gap = unit(0.3,"cm"),
width = unit(4, "cm"),
link_width = unit(1,"cm"))
ht.Pp.clusters <- Heatmap( matrix = Pp.norm[,2:8],
name = "scaled_TPM",
cluster_rows = FALSE,
cluster_row_slices = FALSE,
cluster_columns = FALSE,
show_row_names = FALSE,
column_names_side = "bottom",
column_names_centered = TRUE,
column_names_rot = 90,
column_names_gp = gpar(fontsize = 8),
#width = unit(3, "cm"),
row_split = Pp.norm$cluster,
row_title_rot = 0,
#column_title = "Treatment",
right_annotation = rowAnnotation(cluster = anno),
show_heatmap_legend = FALSE,
heatmap_legend_param = list(legend_direction = "horizontal",
title_position = "topcenter"),
heatmap_width = unit(7.8, "cm"),
)
## Warning: The input is a data frame-like object, convert it to a matrix.
# svg(filename = "../figures/Pp.clusters.svg", width = 3.625, height = 8)
pdf(file = "gene_expression/figures/Pp.clusters.pdf", width = 4, height = 5)
draw(ht.Pp.clusters, heatmap_height = unit(7.5, "cm"), heatmap_legend_side = "bottom", padding = unit(c(0, 0, 0, 0.5), "cm"))
# decorate_row_title("scaled_TPM",{
# grid.text("B", y = unit(1.6, "npc") , x = unit(0, "npc"), just = "left", gp = gpar(fontsize = 22)) })
dev.off()
## quartz_off_screen
## 2